/**
 * 
 */
package com.attilax.dataspider;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeoutException;




import java.util.concurrent.atomic.AtomicInteger;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;



import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.attilax.core;
import com.attilax.tryX;
import com.attilax.asyn.AsynUtil;
import com.attilax.concur.TaskUtil;
import com.attilax.curr.PoolUtil;
import com.attilax.device.HardWareUtils;
import com.attilax.device.PerfUtil;
import com.attilax.dsl.VarUtil;
import com.attilax.exception.ExUtil;
import com.attilax.fileTrans.ConnEx;
import com.attilax.img.PngFormatEx;
import com.attilax.img.imgx;
import com.attilax.io.filex;
import com.attilax.io.pathx;
import com.attilax.lang.text.strUtil;
import com.attilax.lbs.NoRztEx;
import com.attilax.net.HttpUtil;
import com.attilax.net.websitex;
import com.attilax.util.CfgService;
import com.attilax.util.urlUtil;
import com.attilax.web.UrlX;
import com.google.common.collect.Lists;

/**
 * @author ASIMO
 *
 */
public class TsaolyoNetDataSpider extends PicSpider {
	public static final Logger logger = LoggerFactory.getLogger(TsaolyoNetDataSpider.class);
	private     AtomicInteger picSleep=new AtomicInteger();  //mill
	private AtomicInteger artSleep=new AtomicInteger(); ; //mill
	
	int artPoolTaskCount ;
	int PicPoolTaskCount ;
	public int pages;
	public int pageStart;
	public String picSaveDir;
	private int startPage;
	private int endPage;
	TaskExeReport rpt = new TaskExeReport();

	/**
	 * @author attilax 老哇的爪子
	 * @throws InterruptedException 
	 * @throws NoRztEx
	 * @throws ConnEx
	 * @throws IOException
	 * @since p17 d_b_0
	 */
	public static void main(String[] args)   {
		 
			
		 
		
		// 5bu6562R
		//System.setProperty("endPage", "10");
		TsaolyoNetDataSpider x = new TsaolyoNetDataSpider();
		// x.fileName=args[0];// "c:\\r2.csv";
		String cfg=filex.read(pathx.appPath_webPrjMode()+"/spider.ini");
        CfgService cs=new CfgService().read(cfg);
        Map cfg_map=cs.m;
        String sn=new HardWareUtils().getSn().toLowerCase();
        
        TaskUtil.sleep_sec_throwEx(Integer.parseInt(cfg_map.get("dbg_sleep").toString()));
        
        
        int artsleep_frm_cfg = Integer.parseInt(cfg_map.get(sn+"_art_sleep").toString().trim());
		x.artSleep.set( artsleep_frm_cfg);
        x.picSleep.set( Integer.parseInt( cfg_map.get(sn+"_pic_sleep").toString().trim()));
        x.artPoolTaskCount=Integer.parseInt(cfg_map.get(sn+"_art_count").toString().trim());
        x.PicPoolTaskCount=Integer.parseInt( cfg_map.get(sn+"_pic_count").toString().trim());
		x.picSaveDir = "c:\\0picSaveDir";
		x.startPage = Integer.parseInt(cfg_map.get("startPage").toString());
		x.endPage = Integer.parseInt(cfg_map.get("endPage").toString());
		
		
	
		
		
		
		x.exec();
		// t2(x);
		
	

		String picurl = "http://cdn1.snapgram.co/imgs/2016/08/01/marker-small.png";
		String name2 = filex.getFileName(picurl);
		// System.out.println(name2);
		logger.info("--fi0055667788");

		//

	}

	private static void t2(TsaolyoNetDataSpider x) {
		String h = "c:\\new 36.html";
		String h2 = filex.read(h, "utf8");
		List li = x.getArtListByPagehtml(h2);
	//	logger.info(li);

		String s3 = "http://view.news.qq.com/original/intouchtoday/n3660.html";
		logger.info(x
						.getAbsUrlPic(s3,
								"./art.net_files/22088668b119b1691940c03f61ef6ea5a08094fc.jpg"));

		String h5 = filex.read("c:\\art.net.html", "gbk");
		List li5 = x.getPics_byHtml(h5,
				"http://cl.cmcher.com/htm_data/16/1609/2082995.html");
		System.out.println(li5);

		x.downPic(
				"http://img03.cweb-pix.com/images/2016/02/18/220886248d19f404ee8f601798bea7e4edb4377d.jpg",
				"tt");
	}

	filex fx;
	public String fileName;
	public String kw;
	private ExecutorService picPool;
	private int min_artSleep=300;
	PerfUtil perfUtil = new PerfUtil();
	/**
	 * stand
	 */
	public String exec() {
		
		logger.info("--start now");
			int minPicSleep = 1000;
			Runnable upRun=()->{
				int upStepSleep = 100;
				int picSleep=this.picSleep.get();
				picSleep=picSleep-upStepSleep;
				if(picSleep<=minPicSleep)
					picSleep=minPicSleep;
			 
				logger.info(" --up_cpu_use: now picsleep:"+picSleep);
				int artSleep=this.artSleep.get();
				artSleep=artSleep-upStepSleep;
				if(artSleep<=min_artSleep)
					artSleep=min_artSleep;
				logger.info(" --up_cpu_use: now artsleep:"+artSleep);
		};
	
		Runnable reduceRun = ()->{
				//reduce cpu use
				int picSleep=this.picSleep.get();
				int reduc_cpu_added_StepSleep = 500;
				this.picSleep.set(picSleep+reduc_cpu_added_StepSleep);
				logger.info("  --reduce_cpu_use: now picsleep:"+picSleep);
				//reduce
				int artSleep=this.artSleep.get();
				this.artSleep.set(artSleep+reduc_cpu_added_StepSleep);
				logger.info("  --reduce_cpu_use: now artsleep:"+artSleep);
				
			};
	 //	perfUtil.ajdCpuUseRateV3(85,reduceRun,upRun);
	
		artPool = Executors.newFixedThreadPool(artPoolTaskCount);
	
		picPool = Executors.newFixedThreadPool(PicPoolTaskCount);
		// fx=new filex(fileName);
		List<String> pages = getpageUrls();
		int n = 1;
		for (String ListUrls : pages) {
			this.nowPageIdx = n;
			try {

				exec_singlePage(ListUrls);

			} catch (Exception e) {
				e.printStackTrace();
			}
			n++;

		}
		
		artPool.execute(PoolUtil.endTask(artPool, "artPool"));
		while (true) // check liage pool
		{
			TaskUtil.sleep_sec(1);

			int ajd = 3;  //adjust for last 
			if (artPool.isShutdown()
					&& rpt.nowIndex_alreadyPicCount.get()+ajd >= rpt.sumbit_pic_count
							.get()) {
				TaskUtil.sleep_sec(10);
				// last waittime
				//picPool.execute(PoolUtil.endTask(picPool, "picPool"));
				break;
			}

		}

		// artPool.execute(PoolUtil.endTask(artPool,"artPool"));

		return null;

		// return tab.html();

	}

	int nowPageIdx;
	int nowArtIdx;
	int nowpicIdx;
	ExecutorService artPool;

	

	/**
	 * stand
	 * 
	 * @author attilax 老哇的爪子
	 * @param page
	 * @throws ConnEx
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 g_37_c
	 */
	private void exec_singlePage(String pageUrl) {
		// if(page>0)
		// break;

		// System.out.println ( new websitex(). WebpageContent(url, "gbk", 3));
		String html = null;
		try {
			String api = pageUrl;
			// http://www.czvv.com/k5bu6562Rp0c0cc0s0m0e0f0d0.html
			websitex wc = new websitex();
			wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
			html = wc.WebpageContent(api, "gbk", 60);
		} catch (Exception e) {
			// e.printStackTrace();
			// throw new ConnEx(e.getMessage());
			ExUtil.throwExV3(e, "-- url:" + pageUrl);
		}
		// ================trace
		// if (new File("C:\\traceOk").exists())
		// filex.save_safe(html, "c:\\rztTrace.html");
		// else
		// filex.del("c:\\rztTrace.html");
		// filex.read("c:\\rzt.html", "gbk");
		// filex.write(path + ".htm", html);
		// html=filex.read("c:\\00.htm");
		List<String> li = getArtListByPagehtml(html);

		for (String artUrl : li) {
			logger.info("--now start process url is :" + artUrl);

			try {
				Runnable runnable = new Runnable() {

					@Override
					public void run() {
						try {
							processArt(artUrl);

						} catch (Exception e) { // ingro err
							new RuntimeException("--url:" + artUrl, e)
									.printStackTrace();
						} finally {
							rpt.nowIndex_article.incrementAndGet();

							showRpt();
						}

					}

				};
				int artSleep=this.artSleep.get();
				if(artSleep!=0)
				   TaskUtil.sleep(artSleep);
				artPool.execute(runnable);
				rpt.sumbit_arts_count.incrementAndGet();

				// core.newThread(runnable, "threadName4arturl:" + artUrl);
			} catch (Exception e) {
				new RuntimeException("--url:" + artUrl, e).printStackTrace();
			}

		}
	

	}

	private void showRpt() {
		int now = rpt.nowIndex_alreadyPicCount.get();
		logger.info("--***showRpt pic/picSubmit: " + String.valueOf(now) + "/"
				+ String.valueOf(rpt.sumbit_pic_count.get())
				+ ",,now art/artSubmit:" + rpt.nowIndex_article.get() + "/"
				+ String.valueOf(rpt.sumbit_arts_count.get()));
		int avg_cpu_use = perfUtil.getCpuRate_avg();
		String perfs = "-- perfinfo  cpuuse:" + PerfUtil.getCpuRate_retNull()
				+ " avg_cpu_use:@avg@  picsleep(millsec):" + this.picSleep
				+ " artsleep:" + artSleep;
		perfs = perfs.replace("@avg@", String.valueOf(avg_cpu_use));
		logger.info(perfs);
	}

	/**
	 * attilax 2016年9月27日 下午4:08:27
	 * 
	 * @param artUrl
	 */
	private void processArt(String artUrl) {
		// article
		String html = getArtHtml(artUrl);
		Map a = new ArticleService().process(html);
		String title = (String) a.get("title");
		List<String> li = getPics_byHtml(html, artUrl);

		for (String picurl : li) {

			try {

				Runnable runnable = new Runnable() {

					@Override
					public void run() {
						try {
							downPic(picurl, title);
						} catch (Exception e) {
							new RuntimeException("--url:" + picurl, e)
									.printStackTrace();
							// e.printStackTrace();
						} finally {

							rpt.nowIndex_alreadyPicCount.incrementAndGet();
							showRpt();
						}

					}

				};
				int picSleep=this.picSleep.get();
				Thread.sleep(picSleep);
				picPool.execute(runnable);
				rpt.sumbit_pic_count.incrementAndGet();
				// core.newThread(runnable, "threadName" + picurl);
			} catch (Exception e) {
				new RuntimeException("--url:" + picurl, e).printStackTrace();
				;
			}

		}

	}

	/**
	 * attilax 2016年9月27日 下午4:55:34
	 * 
	 * @param artUrl
	 * @return
	 */
	private String getArtHtml(String artUrl) {
		websitex wc = new websitex();
		wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
		String html = "";
		try {
			html = wc.WebpageContent(artUrl, "gbk", 60);
		} catch (TimeoutException e) {
			ExUtil.throwExV2(e, "--url:" + artUrl);
		}
		return html;
	}

	/**
	 * abs http://cl.cmcher.com/htm_data/16/1609/2082995.html
	 * 
	 * @author attilax 老哇的爪子
	 * @since p17 e_3_r
	 */
	// private Object getCurPageUrl(int i) {
	// String s="http://www.czvv.com/k"+ Base64. encode(kw,"utf-8")
	// +"p@pagec0cc0s0m0e0f0d0.html".replaceAll("@page", String.valueOf(i-1));
	// return s;
	// }

	/**
	 * attilax 2016年9月27日 下午4:38:55
	 * 
	 * @param artUrl
	 * @return
	 */

	/**
	 * attilax 2016年9月27日 下午4:34:55
	 * 
	 * @param picurl
	 * @param title
	 */
	public void downPic(String picurl, String title) {

		logger.info("--exe downpic,title : " + title + ",,," + picurl);
		String name2 = filex.getFileName(picurl);
		String ext = filex.getExtName(picurl);
		if (ext.trim().equals(""))
			name2 = name2 + ".jpg";
		else if (ext.trim().length() > 5)
			name2 = name2 + ".jpg";
		else if (!ext.trim().toLowerCase().equals("jpg")
				&& !ext.trim().toLowerCase().equals("jpeg")
				&& !ext.trim().toLowerCase().equals("png"))
			name2 = name2 + ".jpg";
		String subDir = filex.fileNameEncode(title);
		name2 = filex.fileNameEncode(name2);

		String savepath = this.picSaveDir + "\\" + subDir + "\\" + name2;

		delDownHalfFile(savepath);
		if (new File(savepath).exists()) // already down full and ok
		{
			return; // brk jmp
		}

		if (picurl.trim().toLowerCase().endsWith(".gif"))
			return;

		try {

			new HttpDownloader().down(picurl, savepath, 90);
		} catch (Exception e) {
			// if (new File(savepath).exists())
			// new File(savepath).delete();
			ExUtil.throwExV2(e, "--url:" + picurl);
		} finally {
			try {
				delDownHalfFile(savepath);
			} catch (Exception e2) {
				e2.printStackTrace();
			}

		}

	}

	private void delDownHalfFile(String savepath) {
		if (new File(savepath).exists()) {
			int brk_pct =0;
			try {
				brk_pct=new imgx().GrayLinePercent(savepath);
			} catch (PngFormatEx e) {
				filex.move(savepath, "c:\\0picSaveDir_downEx"+filex.getUUidName(), this.picSaveDir);
			 
				// new File(savepath).delete();
				logger.info("--delfile:" + savepath); 
			}
			
			if (brk_pct > 10) {
				filex.move(savepath, "c:\\0picSaveDir_downEx"+filex.getUUidName(), this.picSaveDir);
				;
				// new File(savepath).delete();
				logger.info("--delfile:" + savepath);
			}
		}
	}

	/**
	 * @author attilax 老哇的爪子
	 * @param artUrl
	 * @since p17 d_58_42
	 */
	private List getPics_byArtUrl(String artUrl) {

		String html = getArtHtml(artUrl);

		return getPics_byHtml(html, artUrl);

		// art.net.html

		// String line = name + "," + tel + "," + lyesyiren + "," + addr;
		// fx.appendLine_flush_safe(line);
		// System.out.println(line);

	}

	/**
	 * attilax 2016年9月27日 下午4:04:56
	 * 
	 * @param artUrl
	 * @param pic
	 * @return
	 */
	public String getAbsUrlPic(String artUrl, String pic) {
		// UrlX.getPath(artUrl);
		return UrlX.getPath(artUrl) + "/" + pic;
	}

	/**
	 * abs attilax 2016年9月27日 下午3:07:36
	 * 
	 * @param string
	 * @return
	 */
	public List<String> getPics_byHtml(String html, String artUrl) {
		List<String> li = Lists.newArrayList();
		Document doc = null;
		doc = Jsoup.parse(html);
		Elements input = doc.getElementsByTag("input");
		for (Element element : input) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

		Elements imgs = doc.getElementsByTag("img");
		for (Element element : imgs) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}
		return li;
	}

	/**
	 * attilax 2016年9月28日 上午12:19:01
	 * 
	 * @param element
	 * @param li
	 * @param artUrl
	 */
	private void addPic2li(Element element, List<String> li, String artUrl) {
		String pic = element.attr("src");
		if (pic.trim().length() < 10)
			return;

		// abs url
		if (pic.trim().startsWith("http")) {
			pic = clrPicUrl(pic);
			if (pic.trim().toLowerCase().endsWith(".gif"))
				return;
			if (!li.contains(pic))
				li.add(pic);
			return;
		}

		// if relate path
		if (pic.endsWith("-br-")) {
			pic = pic.replace("-br-", "");
			pic = pic.trim();
		}
		pic = getAbsUrlPic(artUrl, pic);
		if (pic.trim().toLowerCase().endsWith(".gif"))
			return;
		if (!li.contains(pic))
			li.add(pic);
		li.add(pic);
		// return "";

	}

	/**
	 * attilax 2016年9月27日 下午4:33:34
	 * 
	 * @param pic
	 * @return
	 */
	private String clrPicUrl(String pic) {

		if (pic.endsWith("<br>")) {
			pic = pic.replace("<br>", "");
			pic = pic.trim();
		}
		return pic;
	}

	/**
	 * abs * @author attilax 老哇的爪子
	 * 
	 * @param html
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 d_57_m
	 */
	public List getArtListByPagehtml(String html) {

		List<String> li = Lists.newArrayList();

		// new 36.html
		try {
			Document doc = null;
			doc = Jsoup.parse(html);
			Elements tabs = doc.getElementsByTag("a");

			for (Element element : tabs) {
				if (element.attr("href").contains("htm_data")) {
					String artUrl = "http://cl.cmcher.com/"
							+ element.attr("href");
					if (!li.contains(artUrl))
						li.add(artUrl);
				}
			}

		} catch (Exception e) {
			e.printStackTrace();
			// System.out.println("norzt:" + addr);
			throw new RuntimeException("noRzt");
		}

		return li;

	}

	/**
	 * abs
	 * 
	 * @author attilax 老哇的爪子
	 * @since p17 d_55_h
	 */
	public List<String> getpageUrls() {
		String tmp = "http://cl.cmcher.com/thread0806.php?fid=16&search=&page=$p$";
		List<String> li = Lists.newArrayList();
		for (int i = startPage; i <= endPage; i++) {
			String t2 = tmp.replace("$p$", String.valueOf(i));
			li.add(t2);
		}
		return li;
	}

}
